# __author__ = 'tianfuzneg'
# !/usr/bin/python
# -*- coding:utf-8 -*-

########################################################################################
# 2022.0927
# check parameters
########################################################################################
import os

samtools = "/data/fs01/biosoft/samtools-1.9/samtools"
cnv_facets = "/data/fs01/wangzf/software/cnv_facets/cnv_facets.R"
# dbsnp = "/data/fs01/wangzf/nanopore/ref/dbsnp_146.hg38.vcf.gz"
dbsnp = "/data/fs01/wangzf/nanopore/ref/common_all_20180418.vcf"


work_dir = "/data/fs09/wangzf/nanopore/ztf/HCC/WGS"
program_dir = "/data/fs09/wangzf/nanopore/ztf/HCC/program"
cnv_output_dir = os.path.join(work_dir, 'CNV_FACETS_3.0')


somatic_list = ['HCC8_N1', 'HCC8_N3', 'HCC8_T1', 'HCC8_T2', 'HCC8_T3', 'HCC8_T4', 'HCC8_T5',
                'HCC9_N1', 'HCC9_N3', 'HCC9_T1', 'HCC9_T2', 'HCC9_T3', 'HCC9_T4', 'HCC9_T5',
                'HCC10_N1', 'HCC10_N3', 'HCC10_N5', 'HCC10_T1', 'HCC10_T2', 'HCC10_T3', 'HCC10_T4', 'HCC10_T5', 'HCC10_T6', 'HCC10_T7',
                'HCC12_N1', 'HCC12_N3', 'HCC12_T1', 'HCC12_T2', 'HCC12_T3', 'HCC12_T4', 'HCC12_T5',
                'HCC13_N1', 'HCC13_T1', 'HCC13_T2', 'HCC13_T3', 'HCC13_T4', 'HCC13_T5']
########################################################################################
# Option 1: BAM & VCF input
# new dbsnp, lower ndepth, --cval
########################################################################################
dbsnp_chrom = "/data/fs01/wangzf/nanopore/ref/common_all_20180418.chrom.vcf"
with open(dbsnp_chrom, 'w') as out:
    with open(dbsnp, 'r') as f:
        for line in f:
            if line[0] == "#":
                out.write(line)
            else:
                out.write("chr" + line)
            out.flush()
# bgzip -c common_all_20180418.chrom.vcf > common_all_20180418.chrom.vcf.gz
# tabix -p vcf common_all_20180418.chrom.vcf.gz
dbsnp_gz = "/data/fs01/wangzf/nanopore/ref/common_all_20180418.chrom.vcf.gz"

for sample_id in somatic_list:
    cnv_output_dir_sample = os.path.join(cnv_output_dir, sample_id)
    if os.path.exists(cnv_output_dir_sample):
        os.system("rm -rf %s" % cnv_output_dir_sample)
    os.makedirs(cnv_output_dir_sample)
    # bam configure
    sample_id_normal = sample_id.split('_')[0] + '_WBC'
    if "HCC13" in sample_id:
        bwa_output_dir = os.path.join(work_dir, 'BWA')
        tumor_bam = os.path.join(bwa_output_dir, sample_id, "GATK",
                                 "%s_BWA_merge_sorted_markdup_BQSR.bam" % sample_id)
        blood_bam = os.path.join(bwa_output_dir, sample_id_normal, "GATK",
                                 "%s_BWA_merge_sorted_markdup_BQSR.bam" % sample_id_normal)
    elif 'HCC10' in sample_id:
        bwa_output_dir1 = os.path.join(work_dir, 'BWA')
        bwa_output_dir2 = os.path.join(work_dir, 'BWA_HCC10_WBC')
        tumor_bam = os.path.join(bwa_output_dir1, sample_id, "GATK",
                                 "%s_BWA_merge_sorted_markdup_BQSR.SM.bam" % sample_id)
        blood_bam = os.path.join(bwa_output_dir2, sample_id_normal, "GATK",
                                 "%s_BWA_merge_sorted_markdup_BQSR.bam" % sample_id_normal)
    else:
        bwa_output_dir = os.path.join(work_dir, 'BWA')
        tumor_bam = os.path.join(bwa_output_dir, sample_id, "GATK",
                                 "%s_BWA_merge_sorted_markdup_BQSR.SM.bam" % sample_id)
        blood_bam = os.path.join(bwa_output_dir, sample_id_normal, "GATK",
                                 "%s_BWA_merge_sorted_markdup_BQSR.SM.bam" % sample_id_normal)
    # script
    script_pip = os.path.join(cnv_output_dir_sample, "%s_cnv_facets_Option1.sh" % sample_id)
    prefix_o = os.path.join(cnv_output_dir_sample, sample_id)
    with open(script_pip, 'w') as out:
        out.write("#! /bin/bash" + '\n')
        out.write('''export PATH="/data/fs01/wangzf/software/anaconda3/bin:$PATH" \n''')
        out.write("source activate cnv_facets \n")
        out.write('''echo "$(date) 1. Option 1: BAM & VCF input Start: %s" ''' % sample_id + '\n')
        out.write("{cnv_facets} -t {tumour} -n {normal} -vcf {snps} --snp-nprocs 30 "
                  "--depth 15 4000 --cval 25 400 --nbhd-snp 500 -o {out}".format(
            cnv_facets=cnv_facets, tumour=tumor_bam, normal=blood_bam, snps=dbsnp_gz, out=prefix_o) + '\n')
        out.write('''echo "$(date) 1. Option 1: BAM & VCF input Finish: %s" ''' % sample_id + '\n')

i = 0
IDs = [40, 41, 42, 46, 47, 48, 49, 50]
for sampleid in somatic_list:
    cnv_output_dir_sample = os.path.join(cnv_output_dir, sampleid)
    script_pip = os.path.join(cnv_output_dir_sample, "%s_cnv_facets_Option1.sh" % sampleid)
    stdout = script_pip.replace('.sh', '.o')
    stderr = script_pip.replace('.sh', '.e')
    for std in [stdout, stderr]:
        if os.path.exists(std):
            os.system("rm %s" % std)
    if i <= 7:
        os.system(
            'qsub -l hostname=PMC-{server} -S /bin/bash -o {out} -e {err} -N {name} -cwd {script}'.format(
                server=IDs[i], out=stdout, err=stderr, name="%s_cnv" % sampleid, script=script_pip))
        i = i + 1
    else:
        i = 0
        os.system(
            'qsub -l hostname=PMC-{server} -S /bin/bash -o {out} -e {err} -N {name} -cwd {script}'.format(
                server=IDs[i], out=stdout, err=stderr, name="%s_cnv" % sampleid, script=script_pip))
        i = i + 1
